In [1]:
import os
## Set directory
os.chdir('/hpc/group/pbenfeylab/CheWei/CW_data/genesys')
import networkx as nx
from genesys_evaluate_v1 import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
# Suppress all warning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)
/hpc/group/pbenfeylab/ch416/miniconda3/envs/genesys/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
In [2]:
## Conda Env pytorch-gpu on DCC
print(torch.__version__)
print(sc.__version__)
1.11.0 1.9.6
In [3]:
## Genes considered/used (shared among samples)
gene_list = pd.read_csv('./gene_list_1108.csv')
Load Data¶
In [4]:
with open("./genesys_root_data.pkl", 'rb') as file_handle:
data = pickle.load(file_handle)
batch_size = 2000
dataset = Root_Dataset(data['X_test'], data['y_test'])
loader = DataLoader(dataset,
batch_size = batch_size,
shuffle = True, drop_last=True)
In [5]:
input_size = data['X_train'].shape[1]
## 10 cell types
output_size = 10
embedding_dim = 256
hidden_dim = 256
n_layers = 2
device = "cpu"
path = "./"
Load trained GeneSys model¶
In [6]:
model = ClassifierLSTM(input_size, output_size, embedding_dim, hidden_dim, n_layers).to(device)
model.load_state_dict(torch.load(path+"/workstation/genesys_model_trained_on_root_atlas_20240308_continue4.pth", map_location=torch.device('cpu')))
model = model
model.eval()
Out[6]:
ClassifierLSTM(
(fc1): Sequential(
(0): Linear(in_features=17513, out_features=256, bias=True)
(1): Dropout(p=0.2, inplace=False)
(2): GaussianNoise()
)
(fc): Sequential(
(0): ReLU()
(1): Linear(in_features=512, out_features=512, bias=True)
(2): ReLU()
(3): Linear(in_features=512, out_features=10, bias=True)
)
(lstm): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
(dropout): Dropout(p=0.2, inplace=False)
(b_to_z): DBlock(
(fc1): Linear(in_features=512, out_features=256, bias=True)
(fc2): Linear(in_features=512, out_features=256, bias=True)
(fc_mu): Linear(in_features=256, out_features=512, bias=True)
(fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
)
(bz2_infer_z1): DBlock(
(fc1): Linear(in_features=1024, out_features=256, bias=True)
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(fc_mu): Linear(in_features=256, out_features=512, bias=True)
(fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
)
(z1_to_z2): DBlock(
(fc1): Linear(in_features=512, out_features=256, bias=True)
(fc2): Linear(in_features=512, out_features=256, bias=True)
(fc_mu): Linear(in_features=256, out_features=512, bias=True)
(fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
)
(z_to_x): Decoder(
(fc1): Linear(in_features=512, out_features=256, bias=True)
(fc2): Linear(in_features=256, out_features=256, bias=True)
(fc3): Linear(in_features=256, out_features=17513, bias=True)
)
)
In [7]:
classes = ['Columella', 'Lateral Root Cap', 'Phloem', 'Xylem', 'Procambium', 'Pericycle', 'Endodermis', 'Cortex', 'Atrichoblast', 'Trichoblast']
class2num = {c: i for (i, c) in enumerate(classes)}
num2class = {i: c for (i, c) in enumerate(classes)}
In [8]:
cts = ['Atrichoblast','Trichoblast','Cortex','Endodermis','Pericycle','Procambium','Xylem','Phloem','Lateral Root Cap','Columella']
ctw = np.zeros((len(cts), 17513, 17513))
## number of cells sampled from the atlas
batch_size = 2000
In [9]:
## GRN for the transition t3 to t5
for ct in cts:
print(ct)
cws = np.zeros((len(loader), 17513, 17513))
with torch.no_grad():
for i, sample in enumerate(loader):
x = sample['x'].to(device)
y = sample['y'].to(device)
y_label = [num2class[i] for i in y.tolist()]
pred_h = model.init_hidden(batch_size)
tfrom = model.generate_next(x, pred_h, 2).to('cpu').detach().numpy()
cfrom = tfrom[np.where(np.array(y_label)==ct)[0],:]
pred_h = model.init_hidden(batch_size)
tto = model.generate_next(x, pred_h, 4).to('cpu').detach().numpy()
cto = tto[np.where(np.array(y_label)==ct)[0],:]
cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
cws[i] = cw
## Calculate mean across number of repeats
cwm = np.mean(cws, axis=0)
ctw[cts.index(ct)] = cwm
Atrichoblast Trichoblast Cortex Endodermis Pericycle Procambium Xylem Phloem Lateral Root Cap Columella
In [10]:
# Save the array to disk
np.save('genesys_ctw_t3-t5.npy', ctw)
In [11]:
ctw = np.load('genesys_ctw_t3-t5.npy')
In [12]:
## Calculate z-scores
ctw_z = np.zeros((len(cts), 17513, 17513))
for i in range(len(cts)):
ctw_z[i] = (ctw[i] - np.mean(ctw[i])) / np.std(ctw[i])
In [13]:
## Filtering based on z-scores (with no weights)
ctw_f = np.zeros((len(cts), 17513, 17513))
## z-score threshold (keep values > mean + threshold*std)
threshold=3
for i in range(len(cts)):
ctw_f[i] = np.abs(ctw_z[i]) > threshold
Load TFs list¶
In [14]:
wanted_TFs = pd.read_csv("./Kay_TF_thalemine_annotations.csv")
In [15]:
## Make TF names unique and assign preferred names
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G33880"]="WOX9"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G45160"]="SCL27"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G04410"]="NAC78"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G29035"]="ORS1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02540"]="ZHD3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G16500"]="IAA26"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G09740"]="HAG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G24660"]="ZHD2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G46880"]="HDG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G28420"]="RLT1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G14580"]="BLJ"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G45260"]="BIB"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02070"]="RVN"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G28160"]="FIT"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G68360"]="GIS3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G20640"]="NLP4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G05550"]="VFP5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G59470"]="FRF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G15150"]="HAT7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G14750"]="WER"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G75710"]="BRON"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G74500"]="TMO7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G12646"]="RITF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G48100"]="ARR5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G16141"]="GATA17L"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G65640"]="NFL"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G62700"]="VND5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G36160"]="VND2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G66300"]="VND3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G12260"]="VND4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G62380"]="VND6"
In [16]:
pd.Series(wanted_TFs['Name']).value_counts().head(5)
Out[16]:
Name NAC001 1 PRE5 1 MYB118 1 MYB21 1 MYB0 1 Name: count, dtype: int64
Network analysis¶
In [17]:
TFidx = []
for i in wanted_TFs['GeneID']:
if i in gene_list['features'].tolist():
TFidx.append(np.where(gene_list['features']==i)[0][0])
TFidx = np.sort(np.array(TFidx))
In [18]:
def network(i):
## No weights
adj_nw = ctw_f[i]
## Weighted
adj = ctw[i]*ctw_f[i]
## TF only
adj = adj[np.ix_(TFidx,TFidx)]
adj_nw = adj_nw[np.ix_(TFidx,TFidx)]
## Remove no connect
regidx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[0]).value_counts().index[pd.Series(np.where(adj_nw==True)[0]).value_counts()>=1]))
taridx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[1]).value_counts().index[pd.Series(np.where(adj_nw==True)[1]).value_counts()>=1]))
## Reciprocol
keepidx = np.sort(np.array(list(set(regidx).intersection(taridx))))
#keepidx = np.sort(np.array(list(set(regidx).union(taridx))))
TFID = np.array(gene_list['features'][TFidx])[keepidx].tolist()
## TF name to keep
TFname = []
for i in np.array(gene_list['features'][TFidx])[keepidx]:
TFname.append(wanted_TFs['Name'][np.where(wanted_TFs['GeneID']==i)[0][0]])
adj = adj[np.ix_(keepidx,keepidx)]
# Create a NetworkX graph for non-directed edges
G = nx.Graph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(j, i, weight=abs(weight), distance=1/abs(weight))
## Measures the extent to which how close a node is to all other nodes in the network, considering the shortest paths or geodesic distances between nodes
closeness_centrality = nx.closeness_centrality(G, distance='distance')
## Measures the extent to which a node that are not only well-connected but also connected to other well-connected nodes.
eigenvector_centrality = nx.eigenvector_centrality(G)
# Create a NetworkX graph for diected edges
G = nx.MultiDiGraph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(j, i, weight=weight)
## Measures the number of connections (edges) each node has
degree_centrality = nx.degree_centrality(G)
# Calculate outgoing centrality
out_centrality = nx.out_degree_centrality(G)
# Calculate incoming centrality
in_centrality = nx.in_degree_centrality(G)
## Measures the extent to which a node lies on the shortest paths between other nodes.
betweenness_centrality = nx.betweenness_centrality(G, weight='weight')
## Non_Reciprocal Out centrality
# Visualize the graph
pos = nx.spring_layout(G) # Positions of the nodes
# Node colors based on weighted betweenness centrality
node_colors = [out_centrality[node] for node in G.nodes()]
# Node sizes based on weighted betweenness centrality
node_sizes = [out_centrality[node] * 1000 for node in G.nodes()]
# Get the edge weights as a dictionary
edge_weights = nx.get_edge_attributes(G, 'weight')
edge_colors = ['red' if weight > 0 else 'blue' for (_, _, weight) in G.edges(data='weight')]
# Scale the edge weights to desired linewidths
max_weight = max(edge_weights.values())
edge_widths = [float(edge_weights[edge]) / max_weight for edge in G.edges]
# Draw the graph
nx.draw(G, pos=pos, node_color=node_colors, node_size=node_sizes, with_labels=False, width=edge_widths, edge_color=edge_colors)
# Add node labels
labels = {node: G.nodes[node]['name'] for node in G.nodes}
nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=8)
# Add a colorbar to show the weighted betweenness centrality color mapping
sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
sm.set_array([])
plt.colorbar(sm)
# Show the plot
plt.show()
dc = pd.DataFrame.from_dict(degree_centrality, orient='index', columns=['degree_centrality'])
oc = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
ic = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
bc = pd.DataFrame.from_dict(betweenness_centrality, orient='index', columns=['betweenness_centrality'])
cc = pd.DataFrame.from_dict(closeness_centrality, orient='index', columns=['closeness_centrality'])
ec = pd.DataFrame.from_dict(eigenvector_centrality, orient='index', columns=['eigenvector_centrality'])
df = pd.concat([dc,oc,ic,bc,cc,ec], axis=1)
df.index =TFname
df = df.sort_values('betweenness_centrality', ascending=False)
return(df)
In [19]:
atri = network(0)
In [20]:
tri = network(1)
In [21]:
cor = network(2)
In [22]:
end = network(3)
In [23]:
per = network(4)
In [24]:
pro = network(5)
In [25]:
xyl = network(6)
In [26]:
phl = network(7)
In [27]:
lrc = network(8)
In [28]:
col = network(9)
In [29]:
atri.columns = ['atri_degree_centrality','atri_out_centrality','atri_in_centrality','atri_betweenness_centrality','atri_closeness_centrality','atri_eigenvector_centrality']
tri.columns = ['tri_degree_centrality','tri_out_centrality','tri_in_centrality','tri_betweenness_centrality','tri_closeness_centrality','tri_eigenvector_centrality']
cor.columns = ['cor_degree_centrality','cor_out_centrality','cor_in_centrality','cor_betweenness_centrality','cor_closeness_centrality','cor_eigenvector_centrality']
end.columns = ['end_degree_centrality','end_out_centrality','end_in_centrality','end_betweenness_centrality','end_closeness_centrality','end_eigenvector_centrality']
per.columns = ['per_degree_centrality','per_out_centrality','per_in_centrality','per_betweenness_centrality','per_closeness_centrality','per_eigenvector_centrality']
pro.columns = ['pro_degree_centrality','pro_out_centrality','pro_in_centrality','pro_betweenness_centrality','pro_closeness_centrality','pro_eigenvector_centrality']
xyl.columns = ['xyl_degree_centrality','xyl_out_centrality','xyl_in_centrality','xyl_betweenness_centrality','xyl_closeness_centrality','xyl_eigenvector_centrality']
phl.columns = ['phl_degree_centrality','phl_out_centrality','phl_in_centrality','phl_betweenness_centrality','phl_closeness_centrality','phl_eigenvector_centrality']
lrc.columns = ['lrc_degree_centrality','lrc_out_centrality','lrc_in_centrality','lrc_betweenness_centrality','lrc_closeness_centrality','lrc_eigenvector_centrality']
col.columns = ['col_degree_centrality','col_out_centrality','col_in_centrality','col_betweenness_centrality','col_closeness_centrality','col_eigenvector_centrality']
In [30]:
## Indentify main regulators in each net work
tff = []
tff = tff + atri[atri['atri_betweenness_centrality']>0].index.tolist()
tff = tff + tri[tri['tri_betweenness_centrality']>0].index.tolist()
tff = tff + lrc[lrc['lrc_betweenness_centrality']>0].index.tolist()
tff = tff + cor[cor['cor_betweenness_centrality']>0].index.tolist()
tff = tff + end[end['end_betweenness_centrality']>0].index.tolist()
tff = tff + per[per['per_betweenness_centrality']>0].index.tolist()
tff = tff + pro[pro['pro_betweenness_centrality']>0].index.tolist()
tff = tff + xyl[xyl['xyl_betweenness_centrality']>0].index.tolist()
tff = tff + phl[phl['phl_betweenness_centrality']>0].index.tolist()
tff = tff + col[col['col_betweenness_centrality']>0].index.tolist()
tf_occurance = pd.DataFrame(pd.Series(tff).value_counts())
tf_occurance = tf_occurance.rename(columns={
'count': 'tf_occurance'
})
tf_spec = pd.concat([tf_occurance, atri, tri, lrc, cor, end, per, pro, xyl, phl, col], axis=1)
tf_spec = tf_spec.fillna(0)
In [31]:
## Epidermis (atri, tri, lrc)
celltype1='atri'
celltype2='tri'
celltype3='lrc'
ts = tf_spec[tf_spec['tf_occurance']==3][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==9].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[31]:
| atri_betweenness_centrality | tri_betweenness_centrality | lrc_betweenness_centrality | atri_out_centrality | tri_out_centrality | lrc_out_centrality | atri_in_centrality | tri_in_centrality | lrc_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| ATS | 0.878995 | 0.817592 | 0.813492 | 0.169620 | 0.044968 | 0.248052 | 0.068354 | 0.167024 | 0.255844 | 9 | 12.463941 |
| TMO7 | 0.956564 | 0.015568 | 0.518173 | 0.134177 | 0.014989 | 0.119481 | 0.068354 | 0.404711 | 0.066234 | 9 | 11.298251 |
In [32]:
## atri, tri
celltype1='atri'
celltype2='tri'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[32]:
| atri_betweenness_centrality | tri_betweenness_centrality | atri_out_centrality | tri_out_centrality | atri_in_centrality | tri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|
| AT3G53370 | 0.367615 | 0.971800 | 0.050633 | 0.847966 | 0.086076 | 0.194861 | 6 | 8.518951 |
| RSL4 | 0.013468 | 0.748077 | 0.025316 | 0.708779 | 0.015190 | 0.062099 | 6 | 7.572929 |
| RHD6 | 0.063638 | 0.792567 | 0.020253 | 0.447537 | 0.063291 | 0.182013 | 6 | 7.569300 |
| FIT | 0.954495 | 0.049558 | 0.293671 | 0.036403 | 0.131646 | 0.100642 | 6 | 7.566414 |
| CRF4 | 0.937358 | 0.001705 | 0.091139 | 0.014989 | 0.124051 | 0.145610 | 6 | 7.314852 |
| AT2G28710 | 0.005301 | 0.000758 | 0.359494 | 0.053533 | 0.017722 | 0.027837 | 6 | 6.464645 |
| AT3G09735 | 0.001992 | 0.000018 | 0.030380 | 0.119914 | 0.207595 | 0.029979 | 6 | 6.389878 |
| WRKY61 | 0.000263 | 0.001452 | 0.032911 | 0.107066 | 0.015190 | 0.053533 | 6 | 6.210416 |
| OFP12 | 0.000739 | 0.000758 | 0.035443 | 0.008565 | 0.037975 | 0.017131 | 6 | 6.100611 |
In [33]:
## Atrichoblast specific
celltype = 'atri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[33]:
| atri_betweenness_centrality | atri_out_centrality | atri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| GL2 | 0.985575 | 0.415190 | 0.301266 | 3 | 4.702030 |
| BNQ3 | 0.976907 | 0.139241 | 0.156962 | 3 | 4.273109 |
| TTG2 | 0.969215 | 0.769620 | 0.131646 | 3 | 4.870481 |
| TRY | 0.949579 | 0.093671 | 0.124051 | 3 | 4.167301 |
| AT1G11490 | 0.910711 | 0.020253 | 0.167089 | 3 | 4.098053 |
| MEA | 0.897873 | 0.230380 | 0.027848 | 3 | 4.156101 |
| OFP15 | 0.622978 | 0.005063 | 0.093671 | 3 | 3.721712 |
| AT4G31650 | 0.074921 | 0.146835 | 0.073418 | 3 | 3.295174 |
| BEH4 | 0.071754 | 0.050633 | 0.015190 | 3 | 3.137576 |
| MYB45 | 0.057052 | 0.554430 | 0.032911 | 3 | 3.644394 |
| WRKY45 | 0.024880 | 0.258228 | 0.032911 | 3 | 3.316019 |
| DAR7 | 0.023794 | 0.040506 | 0.007595 | 3 | 3.071895 |
| OFP18 | 0.018576 | 0.286076 | 0.078481 | 3 | 3.383133 |
| NAC78 | 0.010454 | 0.081013 | 0.043038 | 3 | 3.134505 |
| WRKY47 | 0.008822 | 0.030380 | 0.002532 | 3 | 3.041734 |
| AT2G27580 | 0.007672 | 0.015190 | 0.048101 | 3 | 3.070963 |
| MYB64 | 0.007434 | 0.002532 | 0.012658 | 3 | 3.022624 |
| HB4 | 0.002532 | 0.005063 | 0.022785 | 3 | 3.030380 |
| HB24 | 0.000707 | 0.230380 | 0.020253 | 3 | 3.251340 |
| NF-YC10 | 0.000051 | 0.086076 | 0.121519 | 3 | 3.207646 |
| BZS1 | 0.000051 | 0.005063 | 0.022785 | 3 | 3.027900 |
| AT3G17100 | 0.000039 | 0.068354 | 0.096203 | 3 | 3.164596 |
| AT2G18670 | 0.000032 | 0.134177 | 0.020253 | 3 | 3.154463 |
| NPH4 | 0.000026 | 0.017722 | 0.037975 | 3 | 3.055722 |
| AT4G28030 | 0.000006 | 0.035443 | 0.002532 | 3 | 3.037981 |
| FRS1 | 0.000006 | 0.010127 | 0.002532 | 3 | 3.012665 |
In [34]:
## Trichoblast specific
celltype = 'tri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[34]:
| tri_betweenness_centrality | tri_out_centrality | tri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| ESE3 | 0.985291 | 0.107066 | 0.239829 | 3 | 4.332186 |
| RL6 | 0.984450 | 0.137045 | 0.074946 | 3 | 4.196442 |
| AT1G02040 | 0.896118 | 0.042827 | 0.055675 | 3 | 3.994619 |
| LRL3 | 0.889418 | 0.732334 | 0.117773 | 3 | 4.739525 |
| MBF1A | 0.886142 | 0.006424 | 0.357602 | 3 | 4.250168 |
| RSL1 | 0.885462 | 0.111349 | 0.100642 | 3 | 4.097453 |
| RAP2.7 | 0.819926 | 0.010707 | 0.019272 | 3 | 3.849905 |
| ZF1 | 0.810704 | 0.079229 | 0.079229 | 3 | 3.969162 |
| ATMYC1 | 0.772211 | 0.203426 | 0.408994 | 3 | 4.384630 |
| EGL3 | 0.573761 | 0.053533 | 0.158458 | 3 | 3.785752 |
| AT4G09100 | 0.484946 | 0.728051 | 0.081370 | 3 | 4.294368 |
| MED6 | 0.432773 | 0.032120 | 0.021413 | 3 | 3.486307 |
| NPR4 | 0.258705 | 0.008565 | 0.029979 | 3 | 3.297249 |
| PHE1 | 0.188441 | 0.012848 | 0.008565 | 3 | 3.209855 |
| AT5G04390 | 0.150890 | 0.074946 | 0.006424 | 3 | 3.232261 |
| AT4G39160 | 0.123930 | 0.494647 | 0.029979 | 3 | 3.648556 |
| bZIP2 | 0.110136 | 0.070664 | 0.139186 | 3 | 3.319986 |
| AT5G12850 | 0.106961 | 0.006424 | 0.014989 | 3 | 3.128374 |
| RSL2 | 0.017489 | 0.229122 | 0.006424 | 3 | 3.253035 |
| NAC005 | 0.013666 | 0.010707 | 0.107066 | 3 | 3.131439 |
| AT3G05860 | 0.007508 | 0.077088 | 0.008565 | 3 | 3.093162 |
| AT3G51470 | 0.005041 | 0.014989 | 0.027837 | 3 | 3.047867 |
| AT5G11340 | 0.000639 | 0.051392 | 0.055675 | 3 | 3.107705 |
| HFR1 | 0.000625 | 0.059957 | 0.029979 | 3 | 3.090561 |
| AT2G05160 | 0.000588 | 0.291221 | 0.012848 | 3 | 3.304657 |
| RAP2.11 | 0.000482 | 0.044968 | 0.017131 | 3 | 3.062581 |
| AT5G16470 | 0.000028 | 0.042827 | 0.113490 | 3 | 3.156344 |
| KAN | 0.000009 | 0.014989 | 0.062099 | 3 | 3.077097 |
| AT1G04850 | 0.000009 | 0.032120 | 0.042827 | 3 | 3.074956 |
| GL3 | 0.000005 | 0.119914 | 0.027837 | 3 | 3.147756 |
In [35]:
## LRC specific
celltype = 'lrc'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[35]:
| lrc_betweenness_centrality | lrc_out_centrality | lrc_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| CRF2 | 0.977525 | 0.485714 | 0.663636 | 3 | 5.126876 |
| 3xHMG-box2 | 0.972707 | 0.271429 | 0.381818 | 3 | 4.625954 |
| BHLH101 | 0.933266 | 0.097403 | 0.197403 | 3 | 4.228072 |
| AT1G77200 | 0.918153 | 0.072727 | 0.157143 | 3 | 4.148023 |
| RGL2 | 0.910315 | 0.124675 | 0.203896 | 3 | 4.238887 |
| RITF1 | 0.571837 | 0.274026 | 0.132468 | 3 | 3.978331 |
| HDG1 | 0.543788 | 0.083117 | 0.101299 | 3 | 3.728203 |
| ERF9 | 0.522338 | 0.211688 | 0.211688 | 3 | 3.945715 |
| OFP7 | 0.471706 | 0.033766 | 0.062338 | 3 | 3.567809 |
| LBD4 | 0.453937 | 0.123377 | 0.077922 | 3 | 3.655236 |
| CRF11 | 0.431295 | 0.059740 | 0.045455 | 3 | 3.536490 |
| AT2G36930 | 0.413595 | 0.153247 | 0.093506 | 3 | 3.660348 |
| WRKY42 | 0.401435 | 0.068831 | 0.081818 | 3 | 3.552085 |
| HMGB4 | 0.351240 | 0.074026 | 0.084416 | 3 | 3.509682 |
| SDG4 | 0.350435 | 0.036364 | 0.138961 | 3 | 3.525760 |
| HB-2 | 0.347194 | 0.119481 | 0.076623 | 3 | 3.543298 |
| MYB3R-4 | 0.346647 | 0.066234 | 0.081818 | 3 | 3.494699 |
| GRF2 | 0.339344 | 0.127273 | 0.070130 | 3 | 3.536747 |
| IAA33 | 0.338063 | 0.029870 | 0.031169 | 3 | 3.399102 |
| ARF8 | 0.333974 | 0.110390 | 0.027273 | 3 | 3.471636 |
| GATA17L | 0.319665 | 0.118182 | 0.075325 | 3 | 3.513171 |
| FEZ | 0.315569 | 0.153247 | 0.185714 | 3 | 3.654530 |
| CHR1 | 0.277476 | 0.081818 | 0.067532 | 3 | 3.426827 |
| PS1 | 0.273026 | 0.050649 | 0.048052 | 3 | 3.371727 |
| CSDP1 | 0.268927 | 0.093506 | 0.049351 | 3 | 3.411785 |
| 3xHMG-box1 | 0.197599 | 0.032468 | 0.048052 | 3 | 3.278118 |
| WRKY7 | 0.186082 | 0.023377 | 0.024675 | 3 | 3.234134 |
| HTA2 | 0.174330 | 0.055844 | 0.125974 | 3 | 3.356148 |
| GATA7 | 0.102602 | 0.042857 | 0.068831 | 3 | 3.214291 |
| HTA13 | 0.083759 | 0.079221 | 0.098701 | 3 | 3.261681 |
| RR10 | 0.052406 | 0.064935 | 0.049351 | 3 | 3.166691 |
| CHR38 | 0.023318 | 0.070130 | 0.029870 | 3 | 3.123318 |
| MYB51 | 0.020747 | 0.023377 | 0.024675 | 3 | 3.068799 |
| AT2G29065 | 0.009236 | 0.067532 | 0.061039 | 3 | 3.137808 |
| FLP | 0.008203 | 0.025974 | 0.029870 | 3 | 3.064047 |
| JMJ18 | 0.007027 | 0.010390 | 0.074026 | 3 | 3.091443 |
| AT5G12980 | 0.005963 | 0.050649 | 0.038961 | 3 | 3.095574 |
| GATA17 | 0.005840 | 0.067532 | 0.063636 | 3 | 3.137009 |
| AT2G41710 | 0.001672 | 0.028571 | 0.025974 | 3 | 3.056217 |
| AT2G36026 | 0.001647 | 0.037662 | 0.084416 | 3 | 3.123725 |
| HMGB5 | 0.001569 | 0.025974 | 0.041558 | 3 | 3.069101 |
| BRM | 0.000802 | 0.049351 | 0.067532 | 3 | 3.117685 |
| CIB5 | 0.000735 | 0.084416 | 0.031169 | 3 | 3.116319 |
| HDG2 | 0.000483 | 0.048052 | 0.077922 | 3 | 3.126457 |
| WRI1 | 0.000358 | 0.025974 | 0.033766 | 3 | 3.060098 |
| SPL1 | 0.000086 | 0.007792 | 0.079221 | 3 | 3.087099 |
| AT2G35605 | 0.000027 | 0.022078 | 0.033766 | 3 | 3.055871 |
| NAC063 | 0.000017 | 0.011688 | 0.045455 | 3 | 3.057160 |
| AP2 | 0.000008 | 0.042857 | 0.020779 | 3 | 3.063645 |
In [36]:
## Columella specific
celltype = 'col'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[36]:
| col_betweenness_centrality | col_out_centrality | col_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| IAA20 | 0.991821 | 0.374365 | 0.360406 | 3 | 4.726593 |
| NTT | 0.991046 | 0.175127 | 0.455584 | 3 | 4.621756 |
| AT3G60670 | 0.983727 | 0.008883 | 0.427665 | 3 | 4.420275 |
| BPC7 | 0.976530 | 0.024112 | 0.063452 | 3 | 4.064094 |
| AT3G52440 | 0.894872 | 0.324873 | 0.111675 | 3 | 4.331420 |
| ... | ... | ... | ... | ... | ... |
| AT2G44430 | 0.000011 | 0.034264 | 0.048223 | 3 | 3.082499 |
| HSFA8 | 0.000011 | 0.013959 | 0.029188 | 3 | 3.043158 |
| DRIP2 | 0.000008 | 0.062183 | 0.021574 | 3 | 3.083764 |
| AT4G22360 | 0.000005 | 0.035533 | 0.030457 | 3 | 3.065995 |
| AT3G52250 | 0.000003 | 0.036802 | 0.087563 | 3 | 3.124369 |
72 rows × 5 columns
In [37]:
## Ground tissue
celltype1='cor'
celltype2='end'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[37]:
| cor_betweenness_centrality | end_betweenness_centrality | cor_out_centrality | end_out_centrality | cor_in_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|
| JKD | 0.035369 | 0.006561 | 0.426914 | 0.169399 | 0.252900 | 0.137705 | 6 | 7.028849 |
| ETR2 | 0.000281 | 0.038321 | 0.148492 | 0.013115 | 0.088167 | 0.001093 | 6 | 6.289468 |
In [38]:
## Cortex specific
celltype = 'cor'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[38]:
| cor_betweenness_centrality | cor_out_centrality | cor_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| MYB50 | 0.781838 | 0.255220 | 0.069606 | 3 | 4.106664 |
| AT1G72210 | 0.126245 | 0.180974 | 0.294664 | 3 | 3.601883 |
| AT2G38300 | 0.063519 | 0.575406 | 0.529002 | 3 | 4.167927 |
| RGL3 | 0.046323 | 0.327146 | 0.273782 | 3 | 3.647251 |
| MYB86 | 0.042886 | 0.475638 | 0.450116 | 3 | 3.968640 |
| HAM3 | 0.025803 | 0.306265 | 0.220418 | 3 | 3.552485 |
| LRP1 | 0.011601 | 0.320186 | 0.257541 | 3 | 3.589327 |
| GATA16 | 0.006961 | 0.204176 | 0.169374 | 3 | 3.380510 |
| MYBR1 | 0.006707 | 0.034803 | 0.053364 | 3 | 3.094874 |
| WRKY57 | 0.004630 | 0.157773 | 0.248260 | 3 | 3.410662 |
| BZR1 | 0.002309 | 0.127610 | 0.180974 | 3 | 3.310894 |
| AT3G61420 | 0.000529 | 0.060325 | 0.078886 | 3 | 3.139740 |
| AT3G24120 | 0.000237 | 0.174014 | 0.122970 | 3 | 3.297221 |
| EIL1 | 0.000210 | 0.185615 | 0.076566 | 3 | 3.262391 |
| bZIP52 | 0.000081 | 0.067285 | 0.039443 | 3 | 3.106809 |
| JAZ6 | 0.000070 | 0.034803 | 0.106729 | 3 | 3.141601 |
| RR3 | 0.000011 | 0.062645 | 0.048724 | 3 | 3.111380 |
In [39]:
## Endodermis specific
celltype = 'end'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[39]:
| end_betweenness_centrality | end_out_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| MYB74 | 0.802011 | 0.951913 | 0.100546 | 3 | 4.854470 |
| MYB36 | 0.591693 | 0.991257 | 1.000000 | 3 | 5.582950 |
| LSMT-L | 0.056560 | 0.001093 | 0.002186 | 3 | 3.059839 |
| AN3 | 0.022951 | 0.006557 | 0.243716 | 3 | 3.273224 |
| SCR | 0.020775 | 0.160656 | 0.217486 | 3 | 3.398917 |
| AT5G57150 | 0.015549 | 0.451366 | 0.140984 | 3 | 3.607899 |
| chr31 | 0.015436 | 0.158470 | 0.160656 | 3 | 3.334561 |
| BLJ | 0.015301 | 0.629508 | 0.189071 | 3 | 3.833880 |
| AGL67 | 0.012107 | 0.003279 | 0.004372 | 3 | 3.019757 |
| AT2G27930 | 0.011930 | 0.077596 | 0.004372 | 3 | 3.093897 |
| HSFB4 | 0.005464 | 0.003279 | 0.137705 | 3 | 3.146448 |
| MYB68 | 0.004473 | 0.781421 | 0.109290 | 3 | 3.895184 |
| TLP11 | 0.002175 | 0.121311 | 0.087432 | 3 | 3.210918 |
| AT3G56230 | 0.001820 | 0.718033 | 0.067760 | 3 | 3.787612 |
| LAF1 | 0.001412 | 0.029508 | 0.072131 | 3 | 3.103052 |
| AGL16 | 0.001108 | 0.056831 | 0.004372 | 3 | 3.062311 |
| AT3G23690 | 0.001095 | 0.168306 | 0.088525 | 3 | 3.257926 |
| HRS1 | 0.001076 | 0.061202 | 0.003279 | 3 | 3.065557 |
| MYB122 | 0.000729 | 0.034973 | 0.039344 | 3 | 3.075046 |
| NAC003 | 0.000122 | 0.001093 | 0.005464 | 3 | 3.006679 |
| STO | 0.000014 | 0.018579 | 0.028415 | 3 | 3.047009 |
| AT1G48040 | 0.000013 | 0.002186 | 0.005464 | 3 | 3.007663 |
| ZFN1 | 0.000012 | 0.017486 | 0.026230 | 3 | 3.043728 |
| ABF2 | 0.000008 | 0.006557 | 0.020765 | 3 | 3.027331 |
| MYB70 | 0.000006 | 0.013115 | 0.025137 | 3 | 3.038257 |
| AT3G04930 | 0.000006 | 0.004372 | 0.019672 | 3 | 3.024050 |
| FBH3 | 0.000006 | 0.007650 | 0.020765 | 3 | 3.028421 |
| ERF15 | 0.000002 | 0.066667 | 0.018579 | 3 | 3.085248 |
| AT5G23280 | 0.000001 | 0.038251 | 0.004372 | 3 | 3.042624 |
In [40]:
## Stele
celltype1='per'
celltype2='pro'
celltype3='xyl'
celltype4='phl'
ts = tf_spec[tf_spec['tf_occurance']==4][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype4+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype4+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality', celltype4+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==12].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[40]:
| per_betweenness_centrality | pro_betweenness_centrality | xyl_betweenness_centrality | phl_betweenness_centrality | per_out_centrality | pro_out_centrality | xyl_out_centrality | phl_out_centrality | per_in_centrality | pro_in_centrality | xyl_in_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AT3G43430 | 0.955343 | 0.067799 | 0.002557 | 0.970183 | 0.834906 | 0.959057 | 0.039897 | 0.463748 | 0.957547 | 0.264268 | 0.043758 | 0.125855 | 12 | 17.684920 |
| AT1G61660 | 0.000005 | 0.959442 | 0.532376 | 0.887866 | 0.075472 | 0.800248 | 0.045045 | 0.073871 | 0.006289 | 0.313896 | 0.137709 | 0.041040 | 12 | 15.873259 |
| HB-8 | 0.001248 | 0.057658 | 0.277498 | 0.540318 | 0.003145 | 0.864764 | 0.942085 | 0.008208 | 0.017296 | 0.172457 | 0.666667 | 0.015048 | 12 | 15.566390 |
| ATAUX2-11 | 0.652630 | 0.010032 | 0.996826 | 0.001392 | 0.066038 | 0.841191 | 0.374517 | 0.013680 | 0.037736 | 0.176179 | 0.155727 | 0.038304 | 12 | 15.364251 |
| IAA12 | 0.066956 | 0.982903 | 0.934685 | 0.000187 | 0.004717 | 0.937965 | 0.051480 | 0.013680 | 0.007862 | 0.045906 | 0.038610 | 0.015048 | 12 | 15.099999 |
| UNE12 | 0.008565 | 0.001241 | 0.001438 | 0.076272 | 0.355346 | 0.334988 | 0.120978 | 0.079343 | 0.251572 | 0.332506 | 0.178893 | 0.023256 | 12 | 13.764398 |
In [41]:
## Pericycle
celltype = 'per'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[41]:
| per_betweenness_centrality | per_out_centrality | per_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| SAC51 | 0.870519 | 0.094340 | 0.077044 | 3 | 4.041903 |
| NAC047 | 0.861531 | 0.113208 | 0.039308 | 3 | 4.014047 |
| AT2G42040 | 0.790041 | 0.055031 | 0.018868 | 3 | 3.863940 |
| AT3G04850 | 0.654831 | 0.028302 | 0.025157 | 3 | 3.708290 |
| HB-7 | 0.507594 | 0.265723 | 0.452830 | 3 | 4.226148 |
| HMGB6 | 0.461145 | 0.006289 | 0.092767 | 3 | 3.560202 |
| AT5G06550 | 0.177965 | 0.004717 | 0.009434 | 3 | 3.192116 |
| GATA23 | 0.164480 | 0.048742 | 0.177673 | 3 | 3.390895 |
| AT1G27050 | 0.053375 | 0.023585 | 0.003145 | 3 | 3.080104 |
| IDD7 | 0.018125 | 0.018868 | 0.006289 | 3 | 3.043282 |
| LBD14 | 0.014735 | 0.080189 | 0.022013 | 3 | 3.116937 |
| AT3G21330 | 0.011826 | 0.003145 | 0.080189 | 3 | 3.095159 |
| NUC | 0.008624 | 0.152516 | 0.055031 | 3 | 3.216171 |
| TRFL10 | 0.007812 | 0.044025 | 0.011006 | 3 | 3.062844 |
| AT4G20970 | 0.007443 | 0.058176 | 0.015723 | 3 | 3.081343 |
| AT2G20100 | 0.006364 | 0.154088 | 0.009434 | 3 | 3.169886 |
| MGP | 0.003972 | 0.053459 | 0.110063 | 3 | 3.167494 |
| AT2G35430 | 0.003053 | 0.070755 | 0.015723 | 3 | 3.089531 |
| GRP2 | 0.001582 | 0.017296 | 0.306604 | 3 | 3.325482 |
| SHR | 0.001572 | 0.055031 | 0.091195 | 3 | 3.147799 |
| RSZ22 | 0.001572 | 0.031447 | 0.059748 | 3 | 3.092767 |
| IAA18 | 0.000567 | 0.034591 | 0.006289 | 3 | 3.041448 |
| AT1G27660 | 0.000245 | 0.127358 | 0.007862 | 3 | 3.135465 |
| AT4G00940 | 0.000015 | 0.023585 | 0.025157 | 3 | 3.048757 |
| CDF3 | 0.000007 | 0.029874 | 0.003145 | 3 | 3.033026 |
| IDD11 | 0.000002 | 0.136792 | 0.072327 | 3 | 3.209122 |
| DEL2 | 0.000002 | 0.007862 | 0.003145 | 3 | 3.011009 |
In [42]:
## Procambium
celltype = 'pro'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[42]:
| pro_betweenness_centrality | pro_out_centrality | pro_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| HAT3 | 0.955693 | 0.192308 | 0.018610 | 3 | 4.166611 |
| AT5G50010 | 0.955669 | 0.697270 | 0.024814 | 3 | 4.677754 |
| NAC045 | 0.043551 | 0.047146 | 0.008685 | 3 | 3.099382 |
| AT3G10040 | 0.031677 | 0.012407 | 0.011166 | 3 | 3.055250 |
| REV | 0.023168 | 0.545906 | 0.101737 | 3 | 3.670811 |
| AT2G29660 | 0.012140 | 0.540943 | 0.035980 | 3 | 3.589063 |
| HB18 | 0.003656 | 0.115385 | 0.059553 | 3 | 3.178594 |
| AT1G75490 | 0.001313 | 0.127792 | 0.183623 | 3 | 3.312728 |
| AT4G17780 | 0.001304 | 0.054591 | 0.012407 | 3 | 3.068301 |
| BZIP24 | 0.001262 | 0.052109 | 0.013648 | 3 | 3.067019 |
| ERF12 | 0.001241 | 0.034739 | 0.193548 | 3 | 3.229529 |
| AT5G24320 | 0.001241 | 0.009926 | 0.099256 | 3 | 3.110422 |
| AT5G60142 | 0.000162 | 0.023573 | 0.002481 | 3 | 3.026216 |
| HYH | 0.000034 | 0.193548 | 0.120347 | 3 | 3.313930 |
| GATA26 | 0.000032 | 0.016129 | 0.073201 | 3 | 3.089362 |
| AT1G18960 | 0.000020 | 0.002481 | 0.006203 | 3 | 3.008705 |
| BPC1 | 0.000015 | 0.003722 | 0.089330 | 3 | 3.093068 |
| LUH | 0.000014 | 0.019851 | 0.063275 | 3 | 3.083140 |
| SHY2 | 0.000014 | 0.263027 | 0.027295 | 3 | 3.290336 |
| OBP4 | 0.000006 | 0.220844 | 0.055831 | 3 | 3.276681 |
| TLP3 | 0.000003 | 0.032258 | 0.069479 | 3 | 3.101740 |
| VRN1 | 0.000003 | 0.060794 | 0.055831 | 3 | 3.116628 |
| AL4 | 0.000002 | 0.007444 | 0.095533 | 3 | 3.102979 |
In [43]:
## Xylem
celltype = 'xyl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[43]:
| xyl_betweenness_centrality | xyl_out_centrality | xyl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| MYB83 | 0.990722 | 0.737452 | 0.117117 | 3 | 4.845291 |
| VND1 | 0.982980 | 0.541828 | 0.060489 | 3 | 4.585297 |
| BEE2 | 0.982901 | 0.236808 | 0.097812 | 3 | 4.317521 |
| XND1 | 0.963602 | 0.889318 | 0.184041 | 3 | 5.036961 |
| AT4G13620 | 0.962645 | 0.050193 | 0.037323 | 3 | 4.050162 |
| VND7 | 0.942810 | 0.790219 | 0.054054 | 3 | 4.787083 |
| SHP1 | 0.933945 | 0.234234 | 0.114543 | 3 | 4.282722 |
| LBD18 | 0.923374 | 0.079794 | 0.129987 | 3 | 4.133155 |
| VND5 | 0.560078 | 0.751609 | 0.036036 | 3 | 4.347723 |
| MYB46 | 0.323950 | 0.903475 | 0.131274 | 3 | 4.358699 |
| VND2 | 0.107889 | 0.988417 | 0.343629 | 3 | 4.439936 |
| AT1G66810 | 0.056570 | 0.182754 | 0.030888 | 3 | 3.270212 |
| MYB99 | 0.038995 | 0.117117 | 0.002574 | 3 | 3.158686 |
| AT2G04845 | 0.033839 | 0.223938 | 0.025740 | 3 | 3.283517 |
| ARR9 | 0.029427 | 0.007722 | 0.010296 | 3 | 3.047445 |
| VND4 | 0.016379 | 0.768340 | 0.118404 | 3 | 3.903123 |
| AT1G26610 | 0.015545 | 0.060489 | 0.083655 | 3 | 3.159689 |
| TCP10 | 0.014767 | 0.018018 | 0.019305 | 3 | 3.052090 |
| BEL10 | 0.014401 | 0.046332 | 0.036036 | 3 | 3.096769 |
| MYB25 | 0.013923 | 0.027027 | 0.045045 | 3 | 3.085995 |
| HB31 | 0.008100 | 0.051480 | 0.019305 | 3 | 3.078885 |
| AGL58 | 0.007686 | 0.006435 | 0.100386 | 3 | 3.114507 |
| BZIP49 | 0.005297 | 0.023166 | 0.029601 | 3 | 3.058064 |
| PHV | 0.003886 | 0.111969 | 0.189189 | 3 | 3.305044 |
| AT1G68200 | 0.002826 | 0.590734 | 0.033462 | 3 | 3.627022 |
| JLO | 0.002516 | 0.057915 | 0.081081 | 3 | 3.141512 |
| IAA6 | 0.002476 | 0.886744 | 0.050193 | 3 | 3.939413 |
| AT5G18090 | 0.002357 | 0.023166 | 0.016731 | 3 | 3.042254 |
| SPL7 | 0.001285 | 0.048906 | 0.041184 | 3 | 3.091375 |
| AT5G06770 | 0.000214 | 0.003861 | 0.007722 | 3 | 3.011797 |
| TCP20 | 0.000071 | 0.151866 | 0.061776 | 3 | 3.213714 |
| AT2G01818 | 0.000050 | 0.002574 | 0.105534 | 3 | 3.108158 |
| VND6 | 0.000028 | 0.141570 | 0.042471 | 3 | 3.184069 |
| AT1G03350 | 0.000020 | 0.063063 | 0.056628 | 3 | 3.119711 |
| IWS1 | 0.000020 | 0.027027 | 0.052767 | 3 | 3.079814 |
In [44]:
## Phloem
celltype = 'phl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[44]:
| phl_betweenness_centrality | phl_out_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| NAC020 | 0.980417 | 0.221614 | 0.154583 | 3 | 4.356614 |
| AT5G02460 | 0.974379 | 0.083447 | 0.160055 | 3 | 4.217881 |
| GATA20 | 0.972391 | 0.105335 | 0.110807 | 3 | 4.188533 |
| AT2G44940 | 0.950758 | 0.357045 | 0.083447 | 3 | 4.391250 |
| AT5G41380 | 0.905144 | 0.482900 | 0.283174 | 3 | 4.671218 |
| WOX2 | 0.850503 | 0.010944 | 0.049248 | 3 | 3.910695 |
| APL | 0.824045 | 0.982216 | 1.001368 | 3 | 5.807629 |
| NAC080 | 0.650170 | 0.001368 | 0.015048 | 3 | 3.666585 |
| AT2G03500 | 0.235905 | 0.658003 | 0.659371 | 3 | 4.553278 |
| AT3G12730 | 0.229208 | 0.820793 | 0.838577 | 3 | 4.888578 |
| AS1 | 0.161295 | 0.050616 | 0.004104 | 3 | 3.216015 |
| CRF1 | 0.156860 | 0.071135 | 0.098495 | 3 | 3.326490 |
| DAR2 | 0.129382 | 0.625171 | 0.766074 | 3 | 4.520627 |
| NAC057 | 0.070987 | 0.499316 | 0.068399 | 3 | 3.638703 |
| AT1G02030 | 0.058578 | 0.009576 | 0.020520 | 3 | 3.088674 |
| NF-YB3 | 0.023882 | 0.005472 | 0.001368 | 3 | 3.030722 |
| AT4G37180 | 0.021159 | 0.580027 | 0.261286 | 3 | 3.862472 |
| HCA2 | 0.012876 | 0.396717 | 0.082079 | 3 | 3.491672 |
| AT3G22100 | 0.011326 | 0.002736 | 0.045144 | 3 | 3.059206 |
| DOF6 | 0.005933 | 0.685363 | 0.132695 | 3 | 3.823990 |
| SPL13A | 0.005030 | 0.009576 | 0.002736 | 3 | 3.017342 |
| PIF7 | 0.004805 | 0.012312 | 0.024624 | 3 | 3.041741 |
| RSZ22a | 0.002374 | 0.005472 | 0.061560 | 3 | 3.069406 |
| SVP | 0.001578 | 0.099863 | 0.001368 | 3 | 3.102809 |
| WRKY32 | 0.001327 | 0.186047 | 0.097127 | 3 | 3.284500 |
| REM22 | 0.001327 | 0.041040 | 0.091655 | 3 | 3.134022 |
| RAP2.2 | 0.001081 | 0.031464 | 0.058824 | 3 | 3.091369 |
| AT1G63820 | 0.000944 | 0.310534 | 0.139535 | 3 | 3.451013 |
| VOZ1 | 0.000723 | 0.195622 | 0.077975 | 3 | 3.274321 |
| PRT1 | 0.000588 | 0.084815 | 0.028728 | 3 | 3.114132 |
| NAC2 | 0.000515 | 0.213406 | 0.035568 | 3 | 3.249489 |
| bZIP19 | 0.000386 | 0.184679 | 0.158687 | 3 | 3.343751 |
| ET2 | 0.000350 | 0.031464 | 0.010944 | 3 | 3.042758 |
| AT2G45460 | 0.000142 | 0.046512 | 0.002736 | 3 | 3.049390 |
| BRH1 | 0.000141 | 0.094391 | 0.035568 | 3 | 3.130100 |
| ARF17 | 0.000090 | 0.017784 | 0.015048 | 3 | 3.032922 |
| EDA16 | 0.000017 | 0.034200 | 0.035568 | 3 | 3.069784 |
| AT5G28040 | 0.000017 | 0.019152 | 0.058824 | 3 | 3.077992 |
| AT2G23780 | 0.000017 | 0.015048 | 0.064295 | 3 | 3.079360 |
| GT-1 | 0.000017 | 0.016416 | 0.062927 | 3 | 3.079360 |
| TLP10 | 0.000017 | 0.025992 | 0.050616 | 3 | 3.076624 |
| ARF11 | 0.000002 | 0.036936 | 0.102599 | 3 | 3.139537 |
Search for individual genes¶
In [45]:
gene = 'SHR'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[45]:
| tf_occurance | end_degree_centrality | end_out_centrality | end_in_centrality | end_closeness_centrality | end_eigenvector_centrality | per_degree_centrality | per_out_centrality | per_in_centrality | per_betweenness_centrality | per_closeness_centrality | per_eigenvector_centrality | pro_degree_centrality | pro_out_centrality | pro_in_centrality | pro_closeness_centrality | pro_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SHR | 1.0 | 0.021858 | 0.004372 | 0.017486 | 0.000344 | 0.021762 | 0.146226 | 0.055031 | 0.091195 | 0.001572 | 0.000366 | 0.052812 | 0.060794 | 0.044665 | 0.016129 | 0.000388 | 0.021401 |
In [46]:
gene = 'BLJ'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[46]:
| tf_occurance | cor_degree_centrality | cor_out_centrality | cor_in_centrality | cor_closeness_centrality | cor_eigenvector_centrality | end_degree_centrality | end_out_centrality | end_in_centrality | end_betweenness_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BLJ | 1.0 | 0.039443 | 0.025522 | 0.013921 | 0.000165 | 0.019281 | 0.818579 | 0.629508 | 0.189071 | 0.015301 | 0.000541 | 0.157907 |
In [47]:
gene = 'JKD'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[47]:
| tf_occurance | cor_degree_centrality | cor_out_centrality | cor_in_centrality | cor_betweenness_centrality | cor_closeness_centrality | cor_eigenvector_centrality | end_degree_centrality | end_out_centrality | end_in_centrality | end_betweenness_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| JKD | 2.0 | 0.679814 | 0.426914 | 0.2529 | 0.035369 | 0.000269 | 0.134281 | 0.307104 | 0.169399 | 0.137705 | 0.006561 | 0.000525 | 0.092102 |
In [48]:
gene = 'RVN'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[48]:
| end_degree_centrality | end_out_centrality | end_in_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|
| RVN | 0.07541 | 0.00765 | 0.06776 | 0.000462 | 0.04864 |
In [49]:
gene = 'BIB'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[49]:
| end_degree_centrality | end_out_centrality | end_in_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|
| BIB | 0.185792 | 0.111475 | 0.074317 | 0.000472 | 0.075902 |
In [50]:
gene = 'IME'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[50]:
In [51]:
gene = 'MYB66'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[51]:
In [52]:
gene = 'GL2'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[52]:
| tf_occurance | atri_degree_centrality | atri_out_centrality | atri_in_centrality | atri_betweenness_centrality | atri_closeness_centrality | atri_eigenvector_centrality | lrc_degree_centrality | lrc_out_centrality | lrc_in_centrality | lrc_closeness_centrality | lrc_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GL2 | 1.0 | 0.716456 | 0.41519 | 0.301266 | 0.985575 | 0.000545 | 0.142819 | 0.002597 | 0.001299 | 0.001299 | 0.000382 | 0.002241 |
In [53]:
tf_spec.to_csv('TF_GRN_centrality_t3-t5_zscore3.csv', index=True)
In [ ]: